Reduce locked critical region in __enter_scheduler(),

author kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>

Sat, 7 Jan 2006 15:53:25 +0000 (16:53 +0100)

committer kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>

Sat, 7 Jan 2006 15:53:25 +0000 (16:53 +0100)
author kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Sat, 7 Jan 2006 15:53:25 +0000 (16:53 +0100)
committer kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Sat, 7 Jan 2006 15:53:25 +0000 (16:53 +0100)
diff --git a/xen/arch/ia64/xen/process.c b/xen/arch/ia64/xen/process.c

index e31e2fa38a79645e2ca1ecd45dcf082c1c47a56f..e1da875cdb5faa13b47397f52ef4da8b62a8bd53 100644 (file)
--- a/xen/arch/ia64/xen/process.c
+++ b/xen/arch/ia64/xen/process.c
@@ -65,24 +65,16 @@ long do_iopl(domid_t domain, unsigned int new_io_pl)
  
  extern struct schedule_data schedule_data[NR_CPUS];
  
-void schedule_tail(struct vcpu *next)
+void schedule_tail(struct vcpu *prev)
  {
-       unsigned long rr7;
-       //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info);
-       //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info);
-
-    // This is necessary because when a new domain is started, our
-    // implementation of context_switch() does not return (switch_to() has
-    // special and peculiar behaviour in this case).
-    context_switch_done();
-
-       /* rr7 will be postponed to last point when resuming back to guest */
-    if(VMX_DOMAIN(current)){
-       vmx_load_all_rr(current);
-    }else{
-           load_region_regs(current);
-            vcpu_load_kernel_regs(current);
-    }
+       context_saved(prev);
+
+       if (VMX_DOMAIN(current)) {
+               vmx_load_all_rr(current);
+       } else {
+               load_region_regs(current);
+               vcpu_load_kernel_regs(current);
+       }
  }
  
  void tdpfoo(void) { }
diff --git a/xen/arch/ia64/xen/xenmisc.c b/xen/arch/ia64/xen/xenmisc.c

index c3605f3dee2b470b457f3b6078410ab3ddc05205..504003ca6f0f88722a24b41ec59f8b5abf8eded1 100644 (file)
--- a/xen/arch/ia64/xen/xenmisc.c
+++ b/xen/arch/ia64/xen/xenmisc.c
@@ -327,6 +327,8 @@ if (!i--) { printk("+",id); i = 1000000; }
         }
             if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
      }
+
+    context_saved(prev);
  }
  
  void continue_running(struct vcpu *same)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c

index 0dcf94366ecafc1a310981442a490ad46bd0dccb..dd79e2aad426ba1a3c562b186b0115321669d0bd 100644 (file)
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -739,7 +739,7 @@ static void __context_switch(void)
  
      if ( p->domain != n->domain )
          cpu_clear(cpu, p->domain->domain_dirty_cpumask);
-    cpu_clear(cpu, n->vcpu_dirty_cpumask);
+    cpu_clear(cpu, p->vcpu_dirty_cpumask);
  
      percpu_ctxt[cpu].curr_vcpu = n;
  }
@@ -749,17 +749,14 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
  {
      unsigned int cpu = smp_processor_id();
  
-    ASSERT(!local_irq_is_enabled());
-
      set_current(next);
  
      if ( (percpu_ctxt[cpu].curr_vcpu != next) &&
           !is_idle_domain(next->domain) )
      {
+        local_irq_disable();
          __context_switch();
-
-        context_switch_done();
-        ASSERT(local_irq_is_enabled());
+        local_irq_enable();
  
          if ( VMX_DOMAIN(next) )
          {
@@ -772,10 +769,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
              vmx_load_msrs(next);
          }
      }
-    else
-    {
-        context_switch_done();
-    }
+
+    context_saved(prev);
  
      schedule_tail(next);
      BUG();
diff --git a/xen/common/sched_bvt.c b/xen/common/sched_bvt.c

index ae8277c23f2d031053c06af9668ecd47e574185f..a3c3e26637f54618f64ea31a35ac7c7536b3d4c9 100644 (file)
--- a/xen/common/sched_bvt.c
+++ b/xen/common/sched_bvt.c
@@ -277,7 +277,7 @@ static void bvt_wake(struct vcpu *v)
  
  static void bvt_sleep(struct vcpu *v)
  {
-    if ( test_bit(_VCPUF_running, &v->vcpu_flags) )
+    if ( schedule_data[v->processor].curr == v )
          cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
      else  if ( __task_on_runqueue(v) )
          __del_from_runqueue(v);
@@ -409,7 +409,7 @@ static struct task_slice bvt_do_schedule(s_time_t now)
          
          __del_from_runqueue(prev);
          
-        if ( domain_runnable(prev) )
+        if ( vcpu_runnable(prev) )
              __add_to_runqueue_tail(prev);
      }
  
diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c

index b09d2ac83423b2e06a69de2f75dd4b4f17000603..169c3a1e626aef4073ad4c79796771e122f0e8f0 100644 (file)
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -782,8 +782,8 @@ static struct task_slice sedf_do_schedule(s_time_t now)
   
      /* create local state of the status of the domain, in order to avoid
         inconsistent state during scheduling decisions, because data for
-       domain_runnable is not protected by the scheduling lock!*/
-    if ( !domain_runnable(current) )
+       vcpu_runnable is not protected by the scheduling lock!*/
+    if ( !vcpu_runnable(current) )
          inf->status |= SEDF_ASLEEP;
   
      if ( inf->status & SEDF_ASLEEP )
@@ -879,7 +879,7 @@ static void sedf_sleep(struct vcpu *d)
  
      EDOM_INFO(d)->status |= SEDF_ASLEEP;
   
-    if ( test_bit(_VCPUF_running, &d->vcpu_flags) )
+    if ( schedule_data[d->processor].curr == d )
      {
          cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
      }
diff --git a/xen/common/schedule.c b/xen/common/schedule.c

index 9e7b088542c54007c569aa9a8a3ab9afcd7464b1..7b5737f643560a1564a4f2c5d387c0dc2c983922 100644 (file)
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -168,7 +168,7 @@ void vcpu_sleep_nosync(struct vcpu *v)
      unsigned long flags;
  
      spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
-    if ( likely(!domain_runnable(v)) )
+    if ( likely(!vcpu_runnable(v)) )
          SCHED_OP(sleep, v);
      spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
  
@@ -184,7 +184,7 @@ void vcpu_sleep_sync(struct vcpu *v)
       * flag is cleared and the scheduler lock is released. We also check that
       * the domain continues to be unrunnable, in case someone else wakes it.
       */
-    while ( !domain_runnable(v) &&
+    while ( !vcpu_runnable(v) &&
              (test_bit(_VCPUF_running, &v->vcpu_flags) ||
               spin_is_locked(&schedule_data[v->processor].schedule_lock)) )
          cpu_relax();
@@ -197,7 +197,7 @@ void vcpu_wake(struct vcpu *v)
      unsigned long flags;
  
      spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
-    if ( likely(domain_runnable(v)) )
+    if ( likely(vcpu_runnable(v)) )
      {
          SCHED_OP(wake, v);
          v->wokenup = NOW();
@@ -387,20 +387,18 @@ static void __enter_scheduler(void)
  {
      struct vcpu        *prev = current, *next = NULL;
      int                 cpu = smp_processor_id();
-    s_time_t            now;
+    s_time_t            now = NOW();
      struct task_slice   next_slice;
      s32                 r_time;     /* time for new dom to run */
  
+    ASSERT(!in_irq());
+
      perfc_incrc(sched_run);
-    
-    spin_lock_irq(&schedule_data[cpu].schedule_lock);
  
-    now = NOW();
+    spin_lock_irq(&schedule_data[cpu].schedule_lock);
  
      rem_ac_timer(&schedule_data[cpu].s_timer);
      
-    ASSERT(!in_irq());
-
      prev->cpu_time += now - prev->lastschd;
  
      /* get policy-specific decision on scheduling... */
@@ -408,7 +406,7 @@ static void __enter_scheduler(void)
  
      r_time = next_slice.time;
      next = next_slice.task;
-    
+
      schedule_data[cpu].curr = next;
      
      next->lastschd = now;
@@ -426,11 +424,6 @@ static void __enter_scheduler(void)
      TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
               next->domain->domain_id, now - next->wokenup, r_time);
  
-    clear_bit(_VCPUF_running, &prev->vcpu_flags);
-    set_bit(_VCPUF_running, &next->vcpu_flags);
-
-    perfc_incrc(sched_ctx);
-
      /*
       * Logic of wokenup field in domain struct:
       * Used to calculate "waiting time", which is the time that a domain
@@ -439,7 +432,7 @@ static void __enter_scheduler(void)
       * also set here then a preempted runnable domain will get a screwed up
       * "waiting time" value next time it is scheduled.
       */
-    prev->wokenup = NOW();
+    prev->wokenup = now;
  
  #if defined(WAKE_HISTO)
      if ( !is_idle_domain(next->domain) && next->wokenup )
@@ -460,6 +453,12 @@ static void __enter_scheduler(void)
      }
  #endif
  
+    set_bit(_VCPUF_running, &next->vcpu_flags);
+
+    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+    perfc_incrc(sched_ctx);
+
      prev->sleep_tick = schedule_data[cpu].tick;
  
      /* Ensure that the domain has an up-to-date time base. */
@@ -474,25 +473,7 @@ static void __enter_scheduler(void)
               prev->domain->domain_id, prev->vcpu_id,
               next->domain->domain_id, next->vcpu_id);
  
-    schedule_data[cpu].context_switch_in_progress = 1;
      context_switch(prev, next);
-    if ( schedule_data[cpu].context_switch_in_progress )
-        context_switch_done();
-}
-
-void context_switch_done(void)
-{
-    unsigned int cpu = smp_processor_id();
-    ASSERT(schedule_data[cpu].context_switch_in_progress);
-    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-    schedule_data[cpu].context_switch_in_progress = 0;
-}
-
-/* No locking needed -- pointer comparison is safe :-) */
-int idle_cpu(int cpu)
-{
-    struct vcpu *p = schedule_data[cpu].curr;
-    return p == idle_domain[cpu];
  }
  
  
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h

index 86ca33591c1b0a9f9beb67d871815c0bce50848c..979cec874afb3f0331841ddee51713febf321deb 100644 (file)
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -18,7 +18,6 @@ struct schedule_data {
      void               *sched_priv;
      struct ac_timer     s_timer;        /* scheduling timer                */
      unsigned long       tick;           /* current periodic 'tick'         */
-    int                 context_switch_in_progress;
  #ifdef BUCKETS
      u32                 hist[BUCKETS];  /* for scheduler latency histogram */
  #endif
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h

index 298eb9506ab46d72af3accfc183371cdb8059376..bb59c178d192447dafc82963b8f76e82239ca852 100644 (file)
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -271,41 +271,28 @@ void vcpu_sleep_sync(struct vcpu *d);
  extern void sync_vcpu_execstate(struct vcpu *v);
  
  /*
- * Called by the scheduler to switch to another VCPU. On entry, although
- * VCPUF_running is no longer asserted for @prev, its context is still running
- * on the local CPU and is not committed to memory. The local scheduler lock
- * is therefore still held, and interrupts are disabled, because the local CPU
- * is in an inconsistent state.
- * 
- * The callee must ensure that the local CPU is no longer running in @prev's
- * context, and that the context is saved to memory, before returning.
- * Alternatively, if implementing lazy context switching, it suffices to ensure
- * that invoking sync_vcpu_execstate() will switch and commit @prev's state.
+ * Called by the scheduler to switch to another VCPU. This function must
+ * call context_saved(@prev) when the local CPU is no longer running in
+ * @prev's context, and that context is saved to memory. Alternatively, if
+ * implementing lazy context switching, it suffices to ensure that invoking
+ * sync_vcpu_execstate() will switch and commit @prev's state.
   */
  extern void context_switch(
      struct vcpu *prev, 
      struct vcpu *next);
  
  /*
- * If context_switch() does not return to the caller, or you need to perform
- * some aspects of state restoration with interrupts enabled, then you must
- * call context_switch_done() at a suitable safe point.
- * 
- * As when returning from context_switch(), the caller must ensure that the
- * local CPU is no longer running in the previous VCPU's context, and that the
- * context is saved to memory. Alternatively, if implementing lazy context
- * switching, ensure that invoking sync_vcpu_execstate() will switch and
- * commit the previous VCPU's state.
+ * As described above, context_switch() must call this function when the
+ * local CPU is no longer running in @prev's context, and @prev's context is
+ * saved to memory. Alternatively, if implementing lazy context switching,
+ * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
   */
-extern void context_switch_done(void);
+#define context_saved(prev) (clear_bit(_VCPUF_running, &(prev)->vcpu_flags))
  
  /* Called by the scheduler to continue running the current VCPU. */
  extern void continue_running(
      struct vcpu *same);
  
-/* Is CPU 'cpu' idle right now? */
-int idle_cpu(int cpu);
-
  void startup_cpu_idle_loop(void);
  
  unsigned long __hypercall_create_continuation(
@@ -400,7 +387,7 @@ extern struct domain *domain_list;
  #define DOMF_debugging         (1UL<<_DOMF_debugging)
  
  
-static inline int domain_runnable(struct vcpu *v)
+static inline int vcpu_runnable(struct vcpu *v)
  {
      return ( (atomic_read(&v->pausecnt) == 0) &&
               !(v->vcpu_flags & (VCPUF_blocked|VCPUF_down)) &&
author	kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
	Sat, 7 Jan 2006 15:53:25 +0000 (16:53 +0100)
committer	kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
	Sat, 7 Jan 2006 15:53:25 +0000 (16:53 +0100)
xen/arch/ia64/xen/process.c		patch \| blob \| history
xen/arch/ia64/xen/xenmisc.c		patch \| blob \| history
xen/arch/x86/domain.c		patch \| blob \| history
xen/common/sched_bvt.c		patch \| blob \| history
xen/common/sched_sedf.c		patch \| blob \| history
xen/common/schedule.c		patch \| blob \| history
xen/include/xen/sched-if.h		patch \| blob \| history
xen/include/xen/sched.h		patch \| blob \| history